StatsBomb - For match event data
soccermatics - Pitch plots
ggsoccer - Pitch plots
install.packages("ggsoccer")
Error in install.packages : Updating loaded packages
install.packages("tidyverse")
Installing package into ‘C:/Users/kusha/AppData/Local/R/win-library/4.3’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.3/tidyverse_2.0.0.zip'
Content type 'application/zip' length 430846 bytes (420 KB)
downloaded 420 KB
package ‘tidyverse’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\kusha\AppData\Local\Temp\Rtmpuy3JWL\downloaded_packages
install.packages("ggsoccer")
Installing package into ‘C:/Users/kusha/AppData/Local/R/win-library/4.3’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.3/ggsoccer_0.1.7.zip'
Content type 'application/zip' length 288927 bytes (282 KB)
downloaded 282 KB
package ‘ggsoccer’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\kusha\AppData\Local\Temp\Rtmpuy3JWL\downloaded_packages
install.packages("ggplot2")
Error in install.packages : Updating loaded packages
install.packages("here")
Installing package into ‘C:/Users/kusha/AppData/Local/R/win-library/4.3’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.3/here_1.0.1.zip'
Content type 'application/zip' length 64152 bytes (62 KB)
downloaded 62 KB
package ‘here’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\kusha\AppData\Local\Temp\Rtmpuy3JWL\downloaded_packages
install.packages("devtools")
Error in install.packages : Updating loaded packages
install.packages("remotes")
Error in install.packages : Updating loaded packages
devtools::install_github("cran/SDMTools")
Skipping install of 'SDMTools' from a github remote, the SHA1 (4f193c85) has not changed since last install.
Use `force = TRUE` to force installation
devtools::install_github("statsbomb/StatsBombR")
Skipping install of 'StatsBombR' from a github remote, the SHA1 (f0503b80) has not changed since last install.
Use `force = TRUE` to force installation
install.packages("remotes")
Installing package into ‘C:/Users/kusha/AppData/Local/R/win-library/4.3’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.3/remotes_2.4.2.1.zip'
Content type 'application/zip' length 398944 bytes (389 KB)
downloaded 389 KB
package ‘remotes’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\kusha\AppData\Local\Temp\Rtmpuy3JWL\downloaded_packages
install.packages("devtools")
Installing package into ‘C:/Users/kusha/AppData/Local/R/win-library/4.3’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.3/devtools_2.4.5.zip'
Content type 'application/zip' length 436170 bytes (425 KB)
downloaded 425 KB
package ‘devtools’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\kusha\AppData\Local\Temp\Rtmpuy3JWL\downloaded_packages
install.packages("ggplot2")
Error in install.packages : Updating loaded packages
devtools::install_github("jogall/soccermatics")
Skipping install of 'soccermatics' from a github remote, the SHA1 (4dfbfebc) has not changed since last install.
Use `force = TRUE` to force installation
install.packages("ggplot2")
Installing package into ‘C:/Users/kusha/AppData/Local/R/win-library/4.3’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.3/ggplot2_3.4.4.zip'
Content type 'application/zip' length 4297844 bytes (4.1 MB)
downloaded 4.1 MB
package ‘ggplot2’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\kusha\AppData\Local\Temp\Rtmpuy3JWL\downloaded_packages
library(ggsoccer)
Warning: package ‘ggsoccer’ was built under R version 4.3.2
library(tidyverse)
Warning: package ‘tidyverse’ was built under R version 4.3.2Warning: package ‘ggplot2’ was built under R version 4.3.2── Attaching core tidyverse packages ──────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.3 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.4 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.0
✔ purrr 1.0.2 ── Conflicts ────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ tidyr::pack() masks Matrix::pack()
✖ dplyr::recode() masks arules::recode()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(here)
Warning: package ‘here’ was built under R version 4.3.2here() starts at D:/Football-Analytics
library(ggplot2)
library(StatsBombR)
Loading required package: stringi
Loading required package: rvest
Attaching package: ‘rvest’
The following object is masked from ‘package:readr’:
guess_encoding
Loading required package: RCurl
Attaching package: ‘RCurl’
The following object is masked from ‘package:tidyr’:
complete
Loading required package: doParallel
Loading required package: foreach
Attaching package: ‘foreach’
The following objects are masked from ‘package:purrr’:
accumulate, when
Loading required package: iterators
Loading required package: parallel
Loading required package: httr
Loading required package: jsonlite
Attaching package: ‘jsonlite’
The following object is masked from ‘package:purrr’:
flatten
Loading required package: sp
The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
which was just loaded, were retired in October 2023.
Please refer to R-spatial evolution reports for details, especially
https://r-spatial.org/r/2023/05/15/evolution4.html.
It may be desirable to make the sf package available;
package maintainers should consider adding sf to Suggests:.
Warning: replacing previous import ‘jsonlite::flatten’ by ‘purrr::flatten’ when loading ‘StatsBombR’Warning: replacing previous import ‘foreach::when’ by ‘purrr::when’ when loading ‘StatsBombR’Warning: replacing previous import ‘foreach::accumulate’ by ‘purrr::accumulate’ when loading ‘StatsBombR’
library(soccermatics)
FreeCompetitions()
[1] "Whilst we are keen to share data and facilitate research, we also urge you to be responsible with the data. Please credit StatsBomb as your data source when using the data and visit https://statsbomb.com/media-pack/ to obtain our logos for public use."
comps <- FreeCompetitions() %>%
filter(competition_id==43 & season_name=="2022")
[1] "Whilst we are keen to share data and facilitate research, we also urge you to be responsible with the data. Please credit StatsBomb as your data source when using the data and visit https://statsbomb.com/media-pack/ to obtain our logos for public use."
comps
matches <- FreeMatches(Competitions = comps)
[1] "Whilst we are keen to share data and facilitate research, we also urge you to be responsible with the data. Please credit StatsBomb as your data source when using the data and visit https://statsbomb.com/media-pack/ to obtain our logos for public use."
matches
events <- get.matchFree(matches[10,])
[1] "Whilst we are keen to share data and facilitate research, we also urge you to be responsible with the data. Please credit StatsBomb as your data source when using the data and visit https://statsbomb.com/media-pack/ to obtain our logos for public use."
events
cleanevents = allclean(events)
Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(id)`Joining with `by = join_by(period, match_id)`
cleanevents
# Converting player name for representation purpose
cleanevents <- cleanevents %>%
mutate(player.name = ifelse(player.name == 'Lionel Andrés Messi Cuccittini', 'Lionel Messi', player.name))
# Player credentials
player_id <- 5503
player_name <- 'Lionel Messi'
process_period <- function(half) {
filtered_events <- cleanevents %>%
filter(type.name == 'Pass' & period==half & player.id==player_id) %>%
select(location, pass.end_location)
locationX <- lapply(filtered_events$location, function(x) x[[1]])
locationY <- lapply(filtered_events$location, function(x) x[[2]])
endLocationX <- lapply(filtered_events$pass.end_location, function(x) x[[1]])
endLocationY <- lapply(filtered_events$pass.end_location, function(x) x[[2]])
filtered_passes <- data.frame(
LocationX = unlist(locationX),
LocationY = unlist(locationY),
EndLocationX = unlist(endLocationX),
EndLocationY = unlist(endLocationY)
)
return (filtered_passes)
}
filtered_passes <- process_period(1)
to_statsbomb <- rescale_coordinates(from = pitch_opta, to = pitch_statsbomb)
passes_rescaled <- data.frame(x = to_statsbomb$x(filtered_passes$LocationX),
y = to_statsbomb$y(filtered_passes$LocationY),
x2 = to_statsbomb$x(filtered_passes$EndLocationX),
y2 = to_statsbomb$y(filtered_passes$EndLocationY))
ggplot(passes_rescaled) +
annotate_pitch(dimensions = pitch_statsbomb, , colour = "white", fill = "springgreen4") +
geom_segment(aes(x = x, y = y, xend = x2, yend = y2),
colour = "coral",
arrow = arrow(length = unit(0.25, "cm"),
type = "closed")) +
geom_point(aes(x = x, y = y),
colour = "yellow",
size = 4) +
theme_pitch() +
direction_label(x_label = 60) +
theme(panel.background = element_rect(fill = "springgreen4"))+
ggtitle("PassMap",
"Messi in the First Half (Normal time)")
filtered_passes <- process_period(2)
to_statsbomb <- rescale_coordinates(from = pitch_opta, to = pitch_statsbomb)
passes_rescaled <- data.frame(x = to_statsbomb$x(filtered_passes$LocationX),
y = to_statsbomb$y(filtered_passes$LocationY),
x2 = to_statsbomb$x(filtered_passes$EndLocationX),
y2 = to_statsbomb$y(filtered_passes$EndLocationY))
ggplot(passes_rescaled) +
annotate_pitch(dimensions = pitch_statsbomb, , colour = "white", fill = "springgreen4") +
geom_segment(aes(x = x, y = y, xend = x2, yend = y2),
colour = "coral",
arrow = arrow(length = unit(0.25, "cm"),
type = "closed")) +
geom_point(aes(x = x, y = y),
colour = "yellow",
size = 4) +
theme_pitch() +
direction_label(x_label = 60) +
theme(panel.background = element_rect(fill = "springgreen4"))+
ggtitle("PassMap",
"Messi in the Second Half (Normal time)")
filtered_passes <- process_period(3)
to_statsbomb <- rescale_coordinates(from = pitch_opta, to = pitch_statsbomb)
passes_rescaled <- data.frame(x = to_statsbomb$x(filtered_passes$LocationX),
y = to_statsbomb$y(filtered_passes$LocationY),
x2 = to_statsbomb$x(filtered_passes$EndLocationX),
y2 = to_statsbomb$y(filtered_passes$EndLocationY))
ggplot(passes_rescaled) +
annotate_pitch(dimensions = pitch_statsbomb, , colour = "white", fill = "springgreen4") +
geom_segment(aes(x = x, y = y, xend = x2, yend = y2),
colour = "coral",
arrow = arrow(length = unit(0.25, "cm"),
type = "closed")) +
geom_point(aes(x = x, y = y),
colour = "yellow",
size = 4) +
theme_pitch() +
direction_label(x_label = 60) +
theme(panel.background = element_rect(fill = "springgreen4"))+
ggtitle("PassMap",
"Messi in the First Half (Extra time)")
filtered_passes <- process_period(4)
to_statsbomb <- rescale_coordinates(from = pitch_opta, to = pitch_statsbomb)
passes_rescaled <- data.frame(x = to_statsbomb$x(filtered_passes$LocationX),
y = to_statsbomb$y(filtered_passes$LocationY),
x2 = to_statsbomb$x(filtered_passes$EndLocationX),
y2 = to_statsbomb$y(filtered_passes$EndLocationY))
ggplot(passes_rescaled) +
annotate_pitch(dimensions = pitch_statsbomb, , colour = "white", fill = "springgreen4") +
geom_segment(aes(x = x, y = y, xend = x2, yend = y2),
colour = "coral",
arrow = arrow(length = unit(0.25, "cm"),
type = "closed")) +
geom_point(aes(x = x, y = y),
colour = "yellow",
size = 4) +
theme_pitch() +
direction_label(x_label = 60) +
theme(panel.background = element_rect(fill = "springgreen4"))+
ggtitle("PassMap",
"Messi in the Second Half (Extra time)")
cleanevents %>%
filter(player.id==player_id & type.name == "Pass") %>%
soccerTransform(method='statsbomb') %>%
soccerPositionMap(id = "player.name", x = "location.x", y = "location.y",
fill1 = "blue",
arrow = "r",
theme = "grass",
title = "Average Pass Postition",
subtitle='Messi vs France')
cleanevents %>%
filter(player.id == player_id) %>%
soccerPassmap(fill = "lightblue", arrow = "r", theme='grass',
title = "Highest Pass Position")
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `pass.outcome.name = fct_explicit_na(pass.outcome.name, "Complete")`.
Caused by warning:
! `fct_explicit_na()` was deprecated in forcats 1.0.0.
ℹ Please use `fct_na_value_to_level()` instead.
ℹ The deprecated feature was likely used in the soccermatics package.
Please report the issue to the authors.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
cleanevents %>%
filter(team.name=='Argentina' & type.name == "Pass") %>%
soccerTransform(method='statsbomb') %>%
soccerPositionMap(id = "player.name", x = "location.x", y = "location.y",
fill1 = "blue",
arrow = "r",
theme = "grass",
title = "Average Pass Postition",
subtitle='Argentina')
summary <- cleanevents %>%
group_by("Team"=cleanevents$team.name) %>%
summarise(shots = sum(type.name=="Shot", na.rm = TRUE))
summary
filtered_events <- cleanevents %>%
filter(team.name == 'Argentina' & type.name == 'Shot') %>%
select(player.name)
shots <- filtered_events %>%
group_by(player.name) %>%
summarise(shots = n())
print(shots)
cleanevents %>%
filter(player.id==player_id) %>%
soccerTransform(method='statsbomb') %>%
soccerShotmap(theme = "grass", title = "ShotMap",
subtitle = "Shots taken by Messi")
cleanevents %>%
filter(player.id==player_id & type.name == "Shot") %>%
soccerTransform(method='statsbomb') %>%
soccerPositionMap(id = "player.name", x = "location.x", y = "location.y",
fill1 = "blue",
arrow = "r",
theme = "grass",
title = "Average Shot Postition",
subtitle = 'Messi vs France')
cleanevents %>%
filter(player.id==player_id) %>%
soccerTransform(method='statsbomb') %>%
soccerHeatmap(x = "location.x", y = "location.y", xBins = 21, yBins = 14,
title = "HeatMap",
subtitle = 'Messi vs France')
data <- cleanevents %>%
select(duration, player.name)
filtered_events_model <- cleanevents %>%
filter(team.name=='Argentina') %>%
select(duration, player.name)
data_clean <- filtered_events_model[complete.cases(filtered_events_model$duration, filtered_events_model$player.name), ]
print(data_clean)
duration_data <- data.frame(total_duration = double(), player_name = character(), stringsAsFactors = FALSE)
process_period <- function(player_name) {
filtered_events <- data_clean %>%
filter(player.name==player_name) %>%
select(duration)
total_duration_player <- sum(filtered_events$duration, na.rm = TRUE)
new_row <- data.frame(total_duration = total_duration_player, player_name = player_name, stringsAsFactors = FALSE)
return (new_row)
}
unique_players <- unique(data_clean$player.name)
for (player_name in unique_players) {
new_row <- process_period(player_name)
duration_data <- rbind(duration_data, new_row)
}
print(duration_data)
#Total shots by each player
shots <- cleanevents %>%
filter(team.name == 'Argentina' & type.name == 'Shot') %>%
group_by(player.name) %>%
summarise(shots = n())
# Create a data frame with all player names
all_players <- data.frame(player.name = unique(cleanevents$player.name[cleanevents$team.name == 'Argentina']))
# Left join with shots summary, replace NA with 0
merged_data <- left_join(all_players, shots, by = "player.name") %>%
mutate(shots = ifelse(is.na(shots), 0, shots))
# Print the result
print(merged_data)
NA
# Assuming merged_data and duration_data are your data frames
# merged_data contains columns player.name and shots
# duration_data contains columns player_name and total_duration
# Left join merged_data with duration_data
final_merged_data <- left_join(merged_data, duration_data, by = c("player.name" = "player_name"))
# If needed, replace NA values in shots and total_duration with 0
final_merged_data$shots[is.na(final_merged_data$shots)] <- 0
final_merged_data$total_duration[is.na(final_merged_data$total_duration)] <- 0
# Print the final merged data
print(final_merged_data)
NA
NA
# Filter and count passes
passes <- cleanevents %>%
filter(team.name == 'Argentina' & type.name == 'Pass') %>%
group_by(player.name) %>%
summarise(passes = n())
# Merge with the existing final_merged_data
final_merged_data <- left_join(final_merged_data, passes, by = c("player.name" = "player.name"))
# If needed, replace NA values in passes with 0
final_merged_data$passes[is.na(final_merged_data$passes)] <- 0
# Print the final merged data
print(final_merged_data)
NA
selected_columns <- final_merged_data[, c("shots", "total_duration", "passes")]
# Standardize the data
scaled_data <- scale(selected_columns)
k <- 3
kmeans_result <- kmeans(scaled_data, centers = k, nstart = 20)
# Add the cluster assignments back to the original data frame
final_merged_data$cluster <- kmeans_result$cluster
# Print the cluster centroids
print(kmeans_result$centers)
shots total_duration passes
1 9.251859e-17 -1.1440579 -1.106724
2 -5.703224e-01 0.2931336 0.311266
3 1.140645e+00 1.1298197 1.037553
# View the distribution of players in each cluster
table(final_merged_data$cluster)
1 2 3
6 8 4
library(ggplot2)
# Scatter plot of total_duration against shots, colored by cluster, with player names
ggplot(final_merged_data, aes(x = total_duration, y = shots, color = factor(cluster), label = player.name)) +
geom_point(aes(size = passes)) +
geom_text(nudge_x = 0.2, nudge_y = 0.2, size = 3) + # Adjust nudge values as needed
labs(title = "K-Means Clustering of Players", x = "Total Duration", y = "Shots", color = "Cluster") +
theme_minimal()
# Draw circles around each cluster
cluster_centers <- as.data.frame(kmeans_result$centers[, c("total_duration", "shots")])
library(ggplot2)
# Scatter plot of total_duration against shots, colored by cluster, with player names
ggplot(final_merged_data, aes(x = passes, y = shots, color = factor(cluster), label = player.name)) +
geom_point(aes(size = total_duration)) +
geom_text(nudge_x = 0.2, nudge_y = 0.2, size = 3) + # Adjust nudge values as needed
labs(title = "K-Means Clustering of Players", x = "Passes", y = "Shots", color = "Cluster") +
theme_minimal()
# Draw circles around each cluster
cluster_centers <- as.data.frame(kmeans_result$centers[, c("passes", "shots")])
library(plotly)
# Create a 3D scatter plot
p <- plot_ly(final_merged_data, x = ~total_duration, y = ~shots, z = ~passes, color = ~factor(cluster)) %>%
add_markers(size = 2) %>%
# Add labels for player names
add_text(text = ~player.name,
showlegend = FALSE,
hoverinfo = "text",
size = 1)
# Show the plot
p
NA
NA
NA